clear
set more off
version 13.1

*********************************
* Import and save cash rent data
*********************************
import delimited using "..\dataRaw\cash_rent_08-13.csv", clear

rename stateansi stfips
rename countyansi cofips 
sort stfips cofips 

rename value cash_rent
* Drop observations that are combinations of counties
drop if cofips==.
* keep all of the fips codes
merge m:1 stfips cofips using "..\dataRaw\fips&district_codes", nogen keep(match master)

* Generate variable = 1 if irrigated cash rent
tab dataitem, gen(irr)
drop dataitem irr2
rename irr1 irr

* drop unnecessary variables from data
keep stfips cofips stcofips state county district year irr cash_rent
sort stcofips year

destring cash_rent, replace ignore(",")

save "..\dataRaw\cash_rent_08-13", replace


******************
* Import LRR data
******************
import delimited using "..\dataRaw\lrr_county_overlay.csv", clear
keep stcofips lrrsym lrr_name shape_area
drop if lrrsym==""
* determine the LRR with greatest area for each county
bys stcofips: egen max_area=max(shape_area)
bys stcofips: drop if shape_area<max_area
duplicates report stcofips
save "..\dataRaw\lrr_county_overlay", replace

* Export as csv to use in ArcGIS
keep stcofips lrrsym
export delimited "..\maps\lrr_county_overlay.csv", replace


******************
* Import ethanol data
******************
import delimited using "..\dataRaw\ethanol_data.csv", clear
ren state_fips stfips
ren county_fips cofips
keep if year==2013
collapse (sum) operating_production, by(stfips cofips)
ren operating_production ethanol_prod
save "..\dataRaw\ethanol_production2013", replace

*********************************
* Import winter wheat acres data
*********************************
import delimited using "..\dataRaw\winter_wheat_acres.csv", clear

rename stateansi stfips
rename countyansi cofips 
sort stfips cofips 

rename value winter_wheat_acres
* The "D" flag denotes that they withheld data to avoid disclosing data
* for individual produers
destring winter_wheat_acres, replace ignore("," "(D)" " (D)")

keep stfips cofips year winter_wheat_acres
drop if cofips==.
reshape wide winter_wheat_acres, i(stfips cofips) j(year)

egen winter_wheat_acres=rowmean(winter_wheat_acres2012 winter_wheat_acres2007 winter_wheat_acres2002)
drop winter_wheat_acres2012 winter_wheat_acres2007 winter_wheat_acres2002
replace winter_wheat_acres=0 if winter_wheat_acres==.

save "..\dataRaw\winter_wheat_acres", replace

*---------------------------------------------------
* Import Urban influence data
*---------------------------------------------------
import excel using "..\dataRaw\UrbanInfluenceCodes2013.xls", clear sheet("Urban Influence Codes 2013") first
ren FIPS stcofips
destring stcofips, replace
keep stcofips UIC_2013
gen large_metro=(UIC_2013==1)
gen small_metro=(UIC_2013==2)
gen micro_adj_large=(UIC_2013==3)
gen noncore_adj_large=(UIC_2013==4)
gen micro_adj_small=(UIC_2013==5)
gen noncore_adj_small=(UIC_2013==6)
gen noncore_adj_small_notown=(UIC_2013==7)
gen micro_noadj=(UIC_2013==8)
gen noncore_adj_micro=(UIC_2013==9)
gen noncore_adj_micro_notown=(UIC_2013==10)
gen noncore_noadj=(UIC_2013==11)
gen noncore_noadj_notown=(UIC_2013==12)


gen metro=large_metro + small_metro
gen micro_adj=micro_adj_large + micro_adj_small 
gen noncore_adj=noncore_adj_large + noncore_adj_small + noncore_adj_small_notown
gen micro_noadj_noncore= micro_noadj + noncore_adj_micro + noncore_adj_micro_notown
gen noncore_noadj_all=noncore_noadj + noncore_noadj_notown
drop UIC_2013
save "..\dataRaw\UrbanInfluenceCodes2013", replace

*---------------------------------------------------
* Subsidy data
*---------------------------------------------------
import excel using "..\dataRaw\farmprogramatlasdata.xls", sheet("DCP") first clear
rename FIPStxt stcofips
sort stcofips
destring stcofips, replace
save "..\dataRaw\farmprogram_dcp", replace

*---------------------------------------------------
* Start with cash rent data and only keep
* nonirrigated data
*---------------------------------------------------
use "..\dataRaw\cash_rent_08-13", clear
gen irryear_string="irr2008" if irr==1 & year==2008
replace irryear_string="_irr2009" if irr==1 & year==2009
replace irryear_string="_irr2010" if irr==1 & year==2010
replace irryear_string="_irr2011" if irr==1 & year==2011
replace irryear_string="_irr2012" if irr==1 & year==2012
replace irryear_string="_irr2013" if irr==1 & year==2013
replace irryear_string="_non2008" if irr==0 & year==2008
replace irryear_string="_non2009" if irr==0 & year==2009
replace irryear_string="_non2010" if irr==0 & year==2010
replace irryear_string="_non2011" if irr==0 & year==2011
replace irryear_string="_non2012" if irr==0 & year==2012
replace irryear_string="_non2013" if irr==0 & year==2013
drop year irr
reshape wide cash_rent, i(state stfips county cofips district stcofips) j(irryear_string) string


* keep all counties, even those with missing rent data
merge 1:1 stfips cofips using "..\dataRaw\fips&district_codes", nogen keep(master match using)
* Drop if state level
drop if cofips==0
* Drop Hawaii
drop if state=="HAWAII"


*---------------------------------------------------
* Merge Acres Harvested, Failed, and Fallowed data
*---------------------------------------------------
* All of these files are reduced to a single row per year.
* The cash rent dataset is a panel though.
* keep all of the observations with rent data only
* and keep observations with acres data only
merge 1:1 stfips cofips using "..\temp\acresHarvested_county", nogen
merge 1:1 stfips cofips using "..\temp\FallowIdleFailed_county", nogen

*---------------------------------------------------
* Merge climate data
*---------------------------------------------------
drop if stcofips==.
merge 1:1 stcofips using "..\temp\PRISM_county_climate83-12_GrowSeason", nogen keep(match master)
* Does not have climate data for a few observations
* But this next code shows that these counties with missing climate data
* rarely have data for nonirrigated cash rent or acres harvested of the major commodities.
* The merge problem likely occurs due to changes in definitions of county boundaries over time.
summ cash_rent_non2013 acresh* if tavg==.
* Drop if missing climate data
drop if tavg==.

merge 1:1 stcofips using "..\temp\PRISM_county_climate83-12_OffSeason", nogen keep(match master)

* ET and precipitation are measured in mm
* convert ET and precipitation to inches 
local varlist ppt ET0 ET0_elev ET0_Har ETc netppt0 netpptc  ///
		aet_usgs aet aet_corn water_deficit_usgs water_deficit water_deficit_corn water_surplus_usgs water_surplus water_surplus_corn ///
		soil_moisture_usgs soil_moisture soil_moisture_corn soil_moist_deficit_usgs soil_moist_deficit soil_moist_deficit_corn	///
		pptOffSeason ET0OffSeason ET0_elevOffSeason ET0_HarOffSeason ETcOffSeason water_deficitOffSeason water_deficit_usgsOffSeason ///
		water_deficit_cornOffSeason water_surplusOffSeason water_surplus_usgsOffSeason water_surplus_cornOffSeason soil_moistureOffSeason ///
		soil_moisture_usgsOffSeason soil_moisture_cornOffSeason soil_moist_deficitOffSeason soil_moist_deficit_cornOffSeason
foreach var in `varlist' {
	qui replace `var'=0.0393701*`var'
}

*---------------------------------------------------
* Merge soils data
*---------------------------------------------------
* The code to generate the soils data is in a different folder
* because the raw soils data are quite large
merge 1:1 stfips cofips using "..\dataRaw\county_soils_usa"
* A few counties are missing soils data
* But very few counties have soils data, but were otherwise missing from the data
* Drop counties with soils data, but no other data
tab state if _merge==1
tab stfips if _merge==2
drop if _merge==2
drop _merge


*---------------------------------------------------
* Merge LRR data
*---------------------------------------------------
merge 1:1 stcofips using "..\dataRaw\lrr_county_overlay", nogen keep(match master)
drop shape_area max_area


*---------------------------------------------------
* Climate Scenarios data
*---------------------------------------------------
merge 1:1 stcofips using "..\dataAnalysis\climate_scenarios", nogen keep(match master)

*---------------------------------------------------
* Ethanol production data
*---------------------------------------------------
merge 1:1 stfips cofips using "..\dataRaw\ethanol_production2013", nogen keep(match master)
replace ethanol_prod=0 if ethanol_prod==.

*---------------------------------------------------
* Urban influence data
*---------------------------------------------------
merge 1:1 stcofips using "..\dataRaw\UrbanInfluenceCodes2013", nogen keep(match master)

*---------------------------------------------------
* Subsidy data
*---------------------------------------------------
merge 1:1 stcofips using "..\dataRaw\farmprogram_dcp", nogen keep(match master)

*---------------------------------------------------
* Subsidy data
*---------------------------------------------------
merge 1:1 stfips cofips using "..\dataRaw\winter_wheat_acres", nogen keep(match master)

*---------------------------------------------------
* Create variables for regression analysis
*---------------------------------------------------
gen clay_perc= claytotal/(sandtotal+silttotal+claytotal)*100
gen silt_perc= silttotal/(sandtotal+silttotal+claytotal)*100
gen sand_perc=100-clay_perc-silt_perc
gen best_soil=0
replace best_soil= nirrcapcl1 + nirrcapcl2
replace best_soil=. if nirrcapcl1==.
gen hydgrp_A_AD=hydgrp_A + hydgrp_AD
gen hydgrp_B_BD=hydgrp_B + hydgrp_BD
gen hydgrp_C_CD=hydgrp_C + hydgrp_CD
drop hydgrp_A hydgrp_AD hydgrp_B hydgrp_BD hydgrp_C hydgrp_CD

gen ln_ksat_lep=ln(ksat*lep)
gen ln_ec=ln(ec)
gen ln_gypsum=ln(gypsum)
gen ln_sar=ln(sar)
gen ln_rootznemc=ln(rootznemc)
gen ln_om=ln(om)
gen ln_slope=ln(slope)

gen rootzn_low=(rootznemc<100)
replace rootzn_low=. if rootznemc==.
gen rootzn_med=(rootznemc>=100 & rootznemc<140)
replace rootzn_med=. if rootznemc==.

replace drain_well=drain_well+drain_moderate
replace drain_excessive=drain_excessive + drain_some_excess
replace drain_poorly=drain_poorly + drain_some_poorly + drain_very_poorly
drop drain_moderate drain_some_excess drain_some_poorly drain_very_poorly
gen tax_other=tax_andisols + tax_aridisols + tax_entisols + tax_gelisols + tax_histosols ///
		+ tax_inceptisols + tax_spodosols + tax_ultisols + tax_vertisols

* 5 Broader texture groups
* Source: http://www.agry.purdue.edu/soils_judging/new_manual/Ch2-texture.html
gen text_sandy=sand + loamy_sand
gen text_mod_sandy= sandy_loam
gen text_medium= silt + silt_loam + loam
gen text_mod_clay= clay_loam + sandy_clay_loam + silty_clay_loam
gen text_clay= sandy_clay + silty_clay + clay	

* convert aws to inches
replace rootznaws=rootznaws*0.0393701

gen ln_ksat=ln(ksat)

* Convert to hundreds of degree days		
gen gdd=(dday10C-dday30C)/100

* Convert from grams/m^2 to kg/m^2
replace soc0_150=soc0_150*0.001

* unique identifier for agricultural districts
egen district_grp=group(stfips district)


label variable netpptc "Net Precipitation" 
label variable ppt "Precipitation" 
label variable ETc "ET" 
label variable vpd "VPD" 
label variable gdd "GDD"
label variable dday30C "EDD"
label variable dday32C "EDD"
label variable dday34C "EDD" 
label variable rootznaws "Available Water Storage" 
label variable ln_slope "Log of Slope" 
label variable cec7 "CEC" 
label variable ph_less6 "pH Less than 6" 
label variable ln_ksat "Log of Permeability" 
label variable clay_perc "Percent Clay" 
label variable silt_perc "Percent Silt" 

label variable pptOffSeason "Off Season Precipitation" 


egen croplandNon_acres=rowtotal(acresh_Non2012 acres_Failed2012 acres_Fallow2012), missing
* Use 2002-2007 average irrigated acreage if irrigated acreage in 2012 is missing
* If still missing, then replace with 0
egen avg_irrig_acres=rowmean(acresh_Irr2002 acresh_Irr2007)
replace acresh_Irr2012=avg_irrig_acres if acresh_Irr2012==.
replace acresh_Irr2012=0 if acresh_Irr2012==.
drop avg_irrig_acres

keep if lrrsym=="M" | lrrsym=="H" | lrrsym=="F" | lrrsym=="G" | lrrsym=="O" | lrrsym=="L" // | lrrsym=="K" 
* Drop New York
drop if stfips==36
* Oklahoma stfips=40
* Texas stfips=48
* New Mexico stfips=35
*drop if stfips==40 | stfips==48 | stfips==35

* Combine some states for clustering
gen stfips_clust=stfips
* Combine Colorad, Wyoming, and Montana
replace stfips_clust=8 if stfips==56 | stfips==30
* Combine New Mexico with Texas
replace stfips_clust=48 if stfips==35
* Combine Tennessee with Missouri
replace stfips_clust=29 if stfips==47
*Combine Arkansas, Mississippi, and Louisiana
replace stfips_clust=5 if stfips==28 | stfips==22
* Combine Minnesot and Wisconsin
replace stfips_clust=27 if stfips==55

* drop counties with outlier values for available water storage
summ rootznaws, detail
drop if rootznaws<3.5

* drop if missing soils
list state county stcofips if clay_perc==.
drop if clay_perc==.

* Subsidy payments per acre
gen direct_pmts = DcpDpPmtTot09/(croplandNon_acres + acresh_Irr2012)
gen ln_direct_pmts = ln(direct_pmts) if direct_pmts>0
replace ln_direct_pmts = ln(0.01) if direct_pmts==0

* Proportion winter wheat
gen winter_wheat=winter_wheat_acres/(croplandNon_acres + acresh_Irr2012)
replace winter_wheat=0 if winter_wheat==.

*------------------------------------------------------------------------
* Potential soils
*------------------------------------------------------------------------
run "..\codeSetup\soils transform - program.do"
soils_tranform

save "..\dataAnalysis\rent_climate_data", replace





